#!/user/bin/env python3
# -*- coding: utf-8 -*-

import requests
import urllib3
import random
from lxml import etree
import time
from selenium import webdriver
from selenium.webdriver.common.by import By

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

def get_detail_url(url):

    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}

    r = requests.get(url, headers=headers, verify=False)
    html = etree.HTML(r.text)

    # 经过上述跳转，得到目标网页地址
    roal_url = html.xpath('//a[@class="btn-redir"]/@href')[0]
    r = requests.get(roal_url, headers=headers, verify=False)
    html = etree.HTML(r.text)
    hrefs = html.xpath('//div[@class="shop_list shop_list_4"]/dl/dt/a/@href')
    channels = html.xpath('//div[@class="shop_list shop_list_4"]/dl/dt/a/@data_channel')
    next_urls = ['https://lz.esf.fang.com' + href +'?channel=' + channel for href,channel in zip(hrefs,channels)]
    house.extend(next_urls)

if __name__ == '__main__':

    house = []
    for i in range(1,100):
        print('--------------------------------')
        print(f'开始爬取第{i}页')
        url = f'https://lz.esf.fang.com/house/i3{i}/'
        get_detail_url(url)
    print('爬取结束！')
    f = open('urls.txt', 'a+', encoding='utf8')
    for i in house:
        f.write(i + '\n')
    f.close()